## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
## Warning: package 'DESeq2' was built under R version 4.0.3
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.0.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.0.3
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 4.0.3
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.0.5
## Loading required package: SummarizedExperiment
## Warning: package 'SummarizedExperiment' was built under R version 4.0.3
## Loading required package: MatrixGenerics
## Warning: package 'MatrixGenerics' was built under R version 4.0.3
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Warning: package 'phyloseq' was built under R version 4.0.3
##
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
##
## distance
## The following object is masked from 'package:Biobase':
##
## sampleNames
## The following object is masked from 'package:GenomicRanges':
##
## distance
## The following object is masked from 'package:IRanges':
##
## distance
# reading genetic distances matrix
ibs <- as.matrix(read.table("clresult.ibsMat"))
bam.ids <- read.table("bamscl copy")
#remove extra crap from ids
bam.ids$V2 <- gsub(".trim.bt2.bam","",bam.ids$V1)
bam.ids$V2 <- gsub("I","-B",bam.ids$V2)
bam.ids$V2 <- gsub("O","-F",bam.ids$V2)
bam.ids$V2 <- gsub("T","TNW",bam.ids$V2)
row.names(ibs) <- c(bam.ids$V2)
colnames(ibs) <- c(bam.ids$V2)
# computing ordination
ibsord=capscale(as.dist(ibs)~1)
# plotting (for fun)
plot(ibsord)# choosing only the interesting eigenvectors to correlate with gene expression
ibs.scores=scores(ibsord,choices=c(1:4),"sites")Processing 16s table
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## class: DESeqDataSet
## dim: 223 92
## metadata(1): version
## assays(1): counts
## rownames(223): sq1 sq2 ... sq325 sq327
## rowData names(0):
## colnames(92): A1 A10 ... H8 H9
## colData names(9): row col ... zone site_zone
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## [1] 223 92
# reading gene expression table (vsd) - can be a variance-stabilized
# OTU counts table
ge <- t(diagvst)
#sample data - has to match the ibs names
samdf <- data.frame(ps.trim@sam_data)
samdf$ibsnames <- paste(samdf$site_zone,"_",samdf$sample)
samdf$ibsnames <- gsub(" ","",samdf$ibsnames)
row.names(samdf) <- samdf$id
ge.names <- merge(samdf,ge,by=0)
row.names(ge.names) <- ge.names$ibsnames
ge.names.match <- ge.names[,12:234]
#subset the ones that match
ibs.scores.subset <- ibs.scores[row.names(ibs.scores) %in% c(samdf$ibsnames),]
ge.subset <- ge.names.match[row.names(ge.names.match) %in% c(row.names(ibs.scores.subset)),]
# testing how much GE variation is explained by IBS eigenvectors
row.names(ge.subset) == row.names(ibs.scores.subset)## [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
ge.subset.arr <- ge.subset[order(as.character(row.names(ge.subset))),]
ibs.scores.arr <- ibs.scores.subset[order(as.character(row.names(ibs.scores.subset))),]
row.names(ge.subset.arr) == row.names(ibs.scores.arr)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
ge.ibs <- rda(ge.subset.arr~ibs.scores.arr)
# plotting correlation of ibs.scores with gene expression ordination
plot(rda(ge.subset.arr~ibs.scores.arr))## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = ge.subset.arr ~ ibs.scores.arr)
## Df Variance F Pr(>F)
## Model 4 40.49 1.0135 0.429
## Residual 79 788.96
#following instructions from here:
#https://jkzorz.github.io/2019/07/08/mantel-test.html
#& here:
#https://www.rdocumentation.org/packages/ape/versions/5.4-1/topics/mantel.test
otu.dist <- vegdist(ge.subset.arr, method="bray")## Warning in vegdist(ge.subset.arr, method = "bray"): results may be meaningless
## because data have negative entries in method "bray"
ibs.subset1 <- ibs[row.names(ibs) %in% c(samdf$ibsnames),]
ibs.subset <- ibs.subset1[,colnames(ibs.subset1) %in% c(samdf$ibsnames)]
row.names(ge.subset.arr) == row.names(ibs.subset)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
ibs.dist <- as.matrix(ibs.subset)
host.bac.mant <- mantel(ibs.dist,otu.dist, method="spearman",permutations=999,na.rm=TRUE)
host.bac.mant #no dice##
## Mantel statistic based on Spearman's rank correlation rho
##
## Call:
## mantel(xdis = ibs.dist, ydis = otu.dist, method = "spearman", permutations = 999, na.rm = TRUE)
##
## Mantel statistic r: -0.01399
## Significance: 0.599
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0783 0.0955 0.1122 0.1338
## Permutation: free
## Number of permutations: 999
ps.its <- readRDS("ps.its2 copy.RDS")
otu.its <- data.frame(ps.its@otu_table)
# diagdds = phyloseq_to_deseq2(ps.its, ~zone)
# diagdds
#
# diagdds = estimateSizeFactors(diagdds)
# diagdds = estimateDispersions(diagdds)
# diagvst = getVarianceStabilizedData(diagdds)
# dim(diagvst)
# reading gene expression table (vsd) - can be a variance-stabilized
# OTU counts table
its.t <- t(otu.its)
#sample data - has to match the ibs names
samdf.its <- data.frame(ps.its@sam_data)
samdf.its$sample_full <- gsub(" ","_",samdf.its$sample_full)
#row.names(samdf.its) <- samdf.its$sample_full
its.names <- merge(samdf.its,otu.its,by=0)
row.names(its.names) <- its.names$sample_full
its.names.match <- its.names[,13:21]
#subset the ones that match
ibs.scores.sub.its <- ibs.scores[row.names(ibs.scores) %in% c(samdf.its$sample_full),]
its.subset <- its.names.match[row.names(its.names.match) %in% c(row.names(ibs.scores.sub.its)),]
# testing how much GE variation is explained by IBS eigenvectors
row.names(its.subset) == row.names(ibs.scores.sub.its)## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE
its.subset.arr <- its.subset[order(as.character(row.names(its.subset))),]
ibs.scores.arr.its <- ibs.scores.sub.its[order(as.character(row.names(ibs.scores.sub.its))),]
row.names(its.subset.arr) == row.names(ibs.scores.arr.its)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
its.ibs <- rda(its.subset.arr~ibs.scores.arr.its)
# plotting correlation of ibs.scores with gene expression ordination
plot(rda(its.subset.arr~ibs.scores.arr.its))## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = its.subset.arr ~ ibs.scores.arr.its)
## Df Variance F Pr(>F)
## Model 4 267778 0.5643 0.877
## Residual 80 9489899
#following instructions from here:
#https://jkzorz.github.io/2019/07/08/mantel-test.html
#& here:
#https://www.rdocumentation.org/packages/ape/versions/5.4-1/topics/mantel.test
its.dist <- vegdist(its.subset.arr, method="bray")
ibs.subset1.its <- ibs[row.names(ibs) %in% c(samdf.its$sample_full),]
ibs.subset.its <- ibs.subset1.its[,colnames(ibs.subset1.its) %in% c(samdf.its$sample_full)]
row.names(its.subset.arr) == row.names(ibs.subset.its)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
ibs.dist <- as.matrix(ibs.subset.its)
host.sym.mant <- mantel(ibs.dist,its.dist, method="spearman",permutations=999,na.rm=TRUE)
host.sym.mant #no dice##
## Mantel statistic based on Spearman's rank correlation rho
##
## Call:
## mantel(xdis = ibs.dist, ydis = its.dist, method = "spearman", permutations = 999, na.rm = TRUE)
##
## Mantel statistic r: -0.02606
## Significance: 0.819
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0358 0.0480 0.0553 0.0639
## Permutation: free
## Number of permutations: 999
ps.its <- readRDS("ps.its2 copy.RDS")
load("ps.trim copy.Rdata")
its.otu <- data.frame(ps.its@otu_table)
bac.otu <- data.frame(ps.trim@otu_table)
samdf.bac <- data.frame(ps.trim@sam_data)
#sample names have to match
row.names(bac.otu) == row.names(samdf.bac)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE
row.names(bac.otu) <- samdf.bac$sample
#subset the ones that match
its.sub <- its.otu[row.names(its.otu) %in% row.names(bac.otu),]
bac.sub <- bac.otu[row.names(bac.otu) %in% row.names(its.sub),]
# testing how much GE variation is explained by IBS eigenvectors
row.names(its.sub) == row.names(bac.sub)## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE
its.sub.arr <- its.sub[order(as.character(row.names(its.sub))),]
bac.sub.arr <- bac.sub[order(as.character(row.names(bac.sub))),]
row.names(its.sub.arr) == row.names(bac.sub.arr)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
its.dist <- vegdist(its.sub.arr)
itsord=capscale(as.dist(its.dist)~1)
# plotting (for fun)
plot(itsord)# choosing only the interesting eigenvectors to correlate with gene expression
itsord.scores=scores(itsord,choices=c(1:4),"sites")
its.bac.rda <- rda(bac.sub.arr~itsord.scores)
# plotting correlation of ibs.scores with gene expression ordination
plot(its.bac.rda)## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = bac.sub.arr ~ itsord.scores)
## Df Variance F Pr(>F)
## Model 4 6256273 1.6187 0.113
## Residual 84 81167242
# stats
its.dist <- vegdist(its.sub.arr, method="bray")
bac.dist <- vegdist(bac.sub.arr, method="bray")
bac.sym.mant <- mantel(bac.dist,its.dist, method="spearman",permutations=999,na.rm=TRUE)
bac.sym.mant##
## Mantel statistic based on Spearman's rank correlation rho
##
## Call:
## mantel(xdis = bac.dist, ydis = its.dist, method = "spearman", permutations = 999, na.rm = TRUE)
##
## Mantel statistic r: 0.03884
## Significance: 0.105
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0409 0.0522 0.0636 0.0848
## Permutation: free
## Number of permutations: 999
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following object is masked from 'package:S4Vectors':
##
## first
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
## Loading required package: carData
##
## Attaching package: 'nlme'
## The following object is masked from 'package:IRanges':
##
## collapse
Read in data files, designate factors
mast <- read.csv('mr_new_master_meta.csv',na.strings=c("","NA"),check.names=FALSE)
mast$site <- as.factor(mast$site)
mast$zone <- as.factor(mast$zone)
mast$site_zone <- as.factor(mast$site_zone)
mast$sym_clade <- as.factor(mast$sym_clade)
mast$sym_type <- as.factor(mast$sym_type)
mast$sym_type_top <- as.factor(mast$sym_type_top)
str(mast)## 'data.frame': 143 obs. of 16 variables:
## $ Sample name : chr "MNW-B_51" "MNW-B_53" "MNW-B_55" "MNW-B_56" ...
## $ sample_num : chr "51" "53" "55" "56" ...
## $ site : Factor w/ 3 levels "MNW","MSE","TNW": 1 1 1 1 1 1 1 1 1 1 ...
## $ zone : Factor w/ 2 levels "Back reef","Fore reef": 1 1 1 1 1 1 1 1 1 1 ...
## $ site_zone : Factor w/ 6 levels "MNW-B","MNW-F",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ 2b-RAD sequencer: chr "HiSeq4000" "HiSeq4000" "HiSeq4000" "HiSeq4000" ...
## $ size_cm2 : num 1244 371 249 441 514 ...
## $ host_het : num 0.00327 0.00311 0.00359 0.00395 NA ...
## $ sym_type : Factor w/ 9 levels "A1.A1ee","A1.A1gf.A1g",..: 4 NA 4 NA NA 3 NA 4 4 4 ...
## $ sym_clade : Factor w/ 2 levels "A","C": 2 NA 2 NA NA 1 NA 2 2 2 ...
## $ sym_type_top : Factor w/ 6 levels "A1","A3","C3ae",..: 3 NA 3 NA NA 2 NA 3 3 3 ...
## $ bac_shannon : num 1.43 NA 1.98 NA NA ...
## $ bac_simpson : num 2.58 NA 3.77 NA NA ...
## $ bac_rich : int 57 NA 50 NA NA 43 NA 76 85 43 ...
## $ bac_even : num 0.354 NA 0.506 NA NA ...
## $ bac_phylo : num 18.4 NA 13.9 NA NA ...
Nada
##
## Shapiro-Wilk normality test
##
## data: mast$size_cm2
## W = 0.52031, p-value < 2.2e-16
##
## Shapiro-Wilk normality test
##
## data: mast$host_het
## W = 0.93948, p-value = 6.15e-05
best.size <- bestNormalize(mast$size_cm2)
mast$size.t <- best.size$x.t
plot(size.t~site_zone,data=mast)best.het <- bestNormalize(mast$host_het)
mast$het.t <- best.het$x.t
a1 <- aov(size.t~site/zone,data=mast)
TukeyHSD(a1)#tahiti different than other two## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = size.t ~ site/zone, data = mast)
##
## $site
## diff lwr upr p adj
## MSE-MNW -0.3898905 -0.9001095 0.120328436 0.1690691
## TNW-MNW -0.9014607 -1.3088958 -0.494025636 0.0000023
## TNW-MSE -0.5115702 -1.0143403 -0.008800031 0.0451650
##
## $`site:zone`
## diff lwr upr p adj
## MSE:Back reef-MNW:Back reef -0.9610373 -1.6722503 -0.24982432 0.0021230
## TNW:Back reef-MNW:Back reef -1.0388392 -1.7417336 -0.33594479 0.0005590
## MNW:Fore reef-MNW:Back reef -1.1994082 -1.9196607 -0.47915577 0.0000667
## MSE:Fore reef-MNW:Back reef NA NA NA NA
## TNW:Fore reef-MNW:Back reef -1.8702284 -2.5583234 -1.18213339 0.0000000
## TNW:Back reef-MSE:Back reef -0.0778019 -0.7890149 0.63341110 0.9995576
## MNW:Fore reef-MSE:Back reef -0.2383709 -0.9667437 0.49000190 0.9322267
## MSE:Fore reef-MSE:Back reef NA NA NA NA
## TNW:Fore reef-MSE:Back reef -0.9091911 -1.6057814 -0.21260075 0.0033699
## MNW:Fore reef-TNW:Back reef -0.1605690 -0.8808215 0.55968345 0.9870251
## MSE:Fore reef-TNW:Back reef NA NA NA NA
## TNW:Fore reef-TNW:Back reef -0.8313892 -1.5194842 -0.14329417 0.0085286
## MSE:Fore reef-MNW:Fore reef NA NA NA NA
## TNW:Fore reef-MNW:Fore reef -0.6708202 -1.3766372 0.03499692 0.0724064
## TNW:Fore reef-MSE:Fore reef NA NA NA NA
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = het.t ~ site/zone, data = mast)
##
## $site
## diff lwr upr p adj
## MSE-MNW -0.5564720 -1.1150277 0.00208358 0.0510867
## TNW-MNW 0.1631006 -0.3681829 0.69438406 0.7465281
## TNW-MSE 0.7195726 0.2199059 1.21923927 0.0025100
##
## $`site:zone`
## diff lwr upr p adj
## MSE:Back reef-MNW:Back reef -0.36976674 -1.3800907 0.6405572 0.8952596
## TNW:Back reef-MNW:Back reef 0.33333940 -0.6225580 1.2892368 0.9130871
## MNW:Fore reef-MNW:Back reef -0.01558811 -1.0382974 1.0071212 1.0000000
## MSE:Fore reef-MNW:Back reef -0.75055238 -1.7496645 0.2485598 0.2558173
## TNW:Fore reef-MNW:Back reef -0.03259056 -0.9957615 0.9305804 0.9999987
## TNW:Back reef-MSE:Back reef 0.70310614 -0.1624007 1.5686130 0.1809483
## MNW:Fore reef-MSE:Back reef 0.35417863 -0.5845953 1.2929526 0.8825268
## MSE:Fore reef-MSE:Back reef -0.38078565 -1.2937956 0.5322243 0.8310626
## TNW:Fore reef-MSE:Back reef 0.33717618 -0.5363571 1.2107095 0.8721230
## MNW:Fore reef-TNW:Back reef -0.34892751 -1.2288604 0.5310054 0.8588682
## MSE:Fore reef-TNW:Back reef -1.08389178 -1.9362841 -0.2314995 0.0046540
## TNW:Fore reef-TNW:Back reef -0.36592995 -1.1758966 0.4440367 0.7784652
## MSE:Fore reef-MNW:Fore reef -0.73496427 -1.6616612 0.1917326 0.2026298
## TNW:Fore reef-MNW:Fore reef -0.01700244 -0.9048315 0.8708266 0.9999999
## TNW:Fore reef-MSE:Fore reef 0.71796183 -0.1425793 1.5785030 0.1583426
#plot(het.t~size.t,data=mast)
ggplot(data=mast,aes(x=log(size_cm2),y=log(host_het),color=zone))+
geom_point()+
facet_wrap(~site)+
stat_smooth(method="lm")## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 51 rows containing non-finite values (stat_smooth).
## Warning: Removed 51 rows containing missing values (geom_point).
##
## Shapiro-Wilk normality test
##
## data: mast$size.t
## W = 0.98683, p-value = 0.3641
##
## Shapiro-Wilk normality test
##
## data: mast$het.t
## W = 0.97885, p-value = 0.06799
##
## Call:
## lm(formula = size.t ~ het.t + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83868 -0.48725 0.03821 0.46905 1.67970
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.20436 0.22026 5.468 4.39e-07 ***
## het.t 0.02525 0.08864 0.285 0.776404
## siteMSE -1.08904 0.29056 -3.748 0.000322 ***
## siteTNW -1.23000 0.27988 -4.395 3.15e-05 ***
## siteMNW:zoneFore reef -1.33356 0.29663 -4.496 2.15e-05 ***
## siteMSE:zoneFore reef NA NA NA NA
## siteTNW:zoneFore reef -0.85620 0.23981 -3.570 0.000586 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7932 on 86 degrees of freedom
## (51 observations deleted due to missingness)
## Multiple R-squared: 0.4063, Adjusted R-squared: 0.3718
## F-statistic: 11.77 on 5 and 86 DF, p-value: 1.107e-08
##
## Call:
## lm(formula = het.t ~ size.t + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.16621 -0.71025 -0.04741 0.64329 2.25528
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07711 0.31081 0.248 0.805
## size.t 0.03734 0.13106 0.285 0.776
## siteMSE -0.32875 0.37943 -0.866 0.389
## siteTNW 0.45784 0.37336 1.226 0.223
## siteMNW:zoneFore reef -0.13651 0.40058 -0.341 0.734
## siteMSE:zoneFore reef NA NA NA NA
## siteTNW:zoneFore reef -0.41250 0.30929 -1.334 0.186
##
## Residual standard error: 0.9645 on 86 degrees of freedom
## (51 observations deleted due to missingness)
## Multiple R-squared: 0.07874, Adjusted R-squared: 0.02518
## F-statistic: 1.47 on 5 and 86 DF, p-value: 0.208
##
## Call:
## lm(formula = host_het ~ size_cm2 + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.031e-04 -2.280e-04 -2.632e-05 1.627e-04 9.552e-04
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.814e-03 1.201e-04 31.755 <2e-16 ***
## size_cm2 -2.625e-07 1.285e-07 -2.042 0.0442 *
## siteMSE -2.587e-04 1.282e-04 -2.018 0.0467 *
## siteTNW 8.201e-06 1.251e-04 0.066 0.9479
## siteMNW:zoneFore reef -2.184e-04 1.347e-04 -1.622 0.1086
## siteMSE:zoneFore reef NA NA NA NA
## siteTNW:zoneFore reef -1.861e-04 9.462e-05 -1.967 0.0524 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0003145 on 86 degrees of freedom
## (51 observations deleted due to missingness)
## Multiple R-squared: 0.1273, Adjusted R-squared: 0.07654
## F-statistic: 2.509 on 5 and 86 DF, p-value: 0.03606
ggplot(data=mast,aes(x=log(host_het),y=log(size_cm2),color=site))+
geom_point()+
facet_wrap(~zone)+
stat_smooth(method="lm")+
theme_cowplot()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 51 rows containing non-finite values (stat_smooth).
## Warning: Removed 51 rows containing missing values (geom_point).
#by site
mast.mnw <- subset(mast,site=="MNW")
mast.mse <- subset(mast,site=="MSE")
mast.tnw <- subset(mast,site=="TNW")
l1 <- lm(host_het~size_cm2,data=mast.mnw)
summary(l1) #ns##
## Call:
## lm(formula = host_het ~ size_cm2, data = mast.mnw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.388e-04 -2.020e-04 -7.810e-06 2.911e-04 6.030e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.678e-03 7.161e-05 51.368 <2e-16 ***
## size_cm2 -2.188e-07 1.177e-07 -1.859 0.074 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0003112 on 27 degrees of freedom
## (18 observations deleted due to missingness)
## Multiple R-squared: 0.1134, Adjusted R-squared: 0.08059
## F-statistic: 3.454 on 1 and 27 DF, p-value: 0.07401
##
## Call:
## lm(formula = het.t ~ size.t, data = mast.mse)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2806 -0.6072 -0.0118 0.5513 1.7386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2660 0.1888 -1.409 0.178
## size.t 0.1688 0.2190 0.771 0.452
##
## Residual standard error: 0.7947 on 16 degrees of freedom
## (26 observations deleted due to missingness)
## Multiple R-squared: 0.03583, Adjusted R-squared: -0.02443
## F-statistic: 0.5945 on 1 and 16 DF, p-value: 0.4519
##
## Call:
## lm(formula = het.t ~ size.t + zone, data = mast.tnw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4298 -0.7081 -0.1531 0.7204 2.2100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5361 0.2053 2.611 0.0125 *
## size.t 0.1318 0.1763 0.748 0.4589
## zoneFore reef -0.3305 0.3254 -1.016 0.3155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.963 on 42 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.06582, Adjusted R-squared: 0.02133
## F-statistic: 1.48 on 2 and 42 DF, p-value: 0.2394
tnw.host <- mast.tnw[complete.cases(mast.tnw$host_het),]
tnw.host.host <- tnw.host[complete.cases(tnw.host$size_cm2),]
a1 <- aov(host_het~size_cm2+Error(zone),data=tnw.host.host)
summary(a1) #ns##
## Error: zone
## Df Sum Sq Mean Sq
## size_cm2 1 2.903e-07 2.903e-07
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## size_cm2 1 9.000e-09 9.310e-09 0.079 0.78
## Residuals 42 4.962e-06 1.181e-07
mnw.host <- mast.mnw[complete.cases(mast.mnw$host_het),]
mnw.host.host <- mnw.host[complete.cases(mnw.host$size_cm2),]
a1 <- aov(het.t~size.t+Error(zone),data=mnw.host.host)
summary(a1) #ns##
## Error: zone
## Df Sum Sq Mean Sq
## size.t 1 0.2494 0.2494
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## size.t 1 1.53 1.53 1.391 0.249
## Residuals 26 28.61 1.10
#install.packages("mblm")
library(mblm)
model.k <- mblm(host_het ~ size_cm2, data=mnw.host.host)
summary(model.k)##
## Call:
## mblm(formula = host_het ~ size_cm2, dataframe = mnw.host.host)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.731e-04 -2.823e-04 -6.775e-05 2.242e-04 5.047e-04
##
## Coefficients:
## Estimate MAD V value Pr(>|V|)
## (Intercept) 3.653e-03 1.305e-04 435 3.73e-09 ***
## size_cm2 2.170e-08 6.553e-07 271 0.256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0003403 on 27 degrees of freedom
Nada
#there's more variation with the C but I think that's just number of samples things
plot(host_het~sym_type,data=mast) #ns## A1.A1ee A1.A1gf.A1g
## 1 3
## A3 C3ae.C3bj.C3f.C3.C3p.C3k
## 1 37
## C3ae.C3f.C3.C3bj C3ae.C3f.C3.C3bj.C3in.C3p.C3il.C3im
## 2 12
## C3ae.C3f.C3bj.C3.C3p.C3ch C3ae.C3g.C3f.C3.C3bj
## 22 9
## C3ae.C3g.C3f.C3.C3io.C3bj NA's
## 6 50
#taking out n of 1-2
lows <- c("A1.A1ee","A3","C3ae.C3f.C3.C3bj")
newdata <- mast[!mast$sym_type %in% lows,]
mast.less <- newdata[complete.cases(newdata$sym_type),]
plot(size.t~sym_type,data=mast.less) mast.less$sym_type <- gsub(".C","-C",mast.less$sym_type)
mast.less$sym_type <- gsub(".A","-A",mast.less$sym_type)
mast.less$sym_type <- gsub("C3ae-C3g-C3f-C3-C3bj","C3ae/C3g-C3f-C3-C3bj",mast.less$sym_type)
mast.less$sym_type <- gsub("C3ae-C3g-C3f-C3-C3io-C3bj","C3ae/C3g/C3f/C3/C3io-C3bj",mast.less$sym_type)
ggplot(mast.less,aes(x=sym_type,y=size.t,color=sym_type))+
geom_boxplot()+
theme_cowplot()+
theme(axis.text.x=element_text(angle=45,hjust=1),legend.position="none")+
xlab("Symbiont type")+
ylab("Colony size (transf.)")## Warning: Removed 15 rows containing non-finite values (stat_boxplot).
#ggsave("sym.size.pdf",width=5,height=5)
a1 <- aov(size.t~sym_type+site/zone,data=mast.less)
summary(a1)## Df Sum Sq Mean Sq F value Pr(>F)
## sym_type 5 21.11 4.222 8.260 4.57e-06 ***
## site 2 1.62 0.812 1.588 0.212
## site:zone 2 13.79 6.897 13.495 1.29e-05 ***
## Residuals 64 32.71 0.511
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 15 observations deleted due to missingness
## Registered S3 methods overwritten by 'klaR':
## method from
## predict.rda vegan
## print.rda vegan
## plot.rda vegan
##
## Attaching package: 'agricolae'
## The following objects are masked from 'package:PerformanceAnalytics':
##
## kurtosis, skewness
## $statistics
## MSerror Df Mean CV
## 0.7077369 68 0.03630616 2317.158
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey sym_type 6 4.147238 0.05
##
## $means
## size.t std r Min
## A1-A1gf-A1g -1.04915768 0.6427173 3 -1.4746993
## C3ae-C3bj-C3f-C3-C3p-C3k 0.59115627 0.8204126 30 -0.9460429
## C3ae-C3f-C3-C3bj-C3in-C3p-C3il-C3im -0.64142138 0.9172433 12 -1.8204724
## C3ae-C3f-C3bj-C3-C3p-C3ch -0.05881964 0.8985429 17 -1.5108395
## C3ae/C3g-C3f-C3-C3bj -0.05826682 0.8201305 9 -1.6400258
## C3ae/C3g/C3f/C3/C3io-C3bj -0.89305594 0.3369254 3 -1.2551285
## Max Q25 Q50
## A1-A1gf-A1g -0.3098213 -1.4188259 -1.36295248
## C3ae-C3bj-C3f-C3-C3p-C3k 2.0174870 0.1135068 0.51814395
## C3ae-C3f-C3-C3bj-C3in-C3p-C3il-C3im 1.3426434 -1.3463720 -0.80292938
## C3ae-C3f-C3bj-C3-C3p-C3ch 1.6659433 -0.8221743 -0.01122879
## C3ae/C3g-C3f-C3-C3bj 0.7710662 -0.3398381 0.15801397
## C3ae/C3g/C3f/C3/C3io-C3bj -0.5887465 -1.0452107 -0.83529281
## Q75
## A1-A1gf-A1g -0.83638688
## C3ae-C3bj-C3f-C3-C3p-C3k 1.26174575
## C3ae-C3f-C3-C3bj-C3in-C3p-C3il-C3im -0.02964506
## C3ae-C3f-C3bj-C3-C3p-C3ch 0.31508902
## C3ae/C3g-C3f-C3-C3bj 0.63127248
## C3ae/C3g/C3f/C3/C3io-C3bj -0.71201964
##
## $comparison
## NULL
##
## $groups
## size.t groups
## C3ae-C3bj-C3f-C3-C3p-C3k 0.59115627 a
## C3ae/C3g-C3f-C3-C3bj -0.05826682 ab
## C3ae-C3f-C3bj-C3-C3p-C3ch -0.05881964 ab
## C3ae-C3f-C3-C3bj-C3in-C3p-C3il-C3im -0.64142138 b
## C3ae/C3g/C3f/C3/C3io-C3bj -0.89305594 b
## A1-A1gf-A1g -1.04915768 b
##
## attr(,"class")
## [1] "group"
##
## Shapiro-Wilk normality test
##
## data: mast$host_het
## W = 0.93948, p-value = 6.15e-05
het.t <- bestNormalize(mast$host_het)
#Standardized Box Cox Transformation with 114 nonmissing obs.
mast$het.t <- het.t$x.t
shapiro.test(mast$het.t)##
## Shapiro-Wilk normality test
##
## data: mast$het.t
## W = 0.97885, p-value = 0.06799
##
## Shapiro-Wilk normality test
##
## data: mast$bac_even
## W = 0.98352, p-value = 0.3597
##
## Call:
## lm(formula = het.t ~ bac_even + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8900 -0.5866 -0.0178 0.4957 2.2542
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.67115 0.63850 -2.617 0.0111 *
## bac_even 2.77751 1.08757 2.554 0.0131 *
## siteMSE 0.36463 0.40852 0.893 0.3755
## siteTNW 0.85694 0.38872 2.205 0.0311 *
## siteMNW:zoneFore reef 0.04207 0.40174 0.105 0.9169
## siteMSE:zoneFore reef -0.67144 0.34980 -1.920 0.0595 .
## siteTNW:zoneFore reef -0.15269 0.33987 -0.449 0.6548
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8577 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.2349, Adjusted R-squared: 0.162
## F-statistic: 3.223 on 6 and 63 DF, p-value: 0.007971
mast.het <- mast[complete.cases(mast$het.t),]
mast.het.bac <- mast.het[complete.cases(mast.het$bac_even),]
#subset fore reef
mast.het.bac.fore <- subset(mast.het.bac,zone=="Fore reef")
shapiro.test(mast.het.bac.fore$host_het)##
## Shapiro-Wilk normality test
##
## data: mast.het.bac.fore$host_het
## W = 0.95932, p-value = 0.2048
##
## Shapiro-Wilk normality test
##
## data: mast.het.bac.fore$bac_even
## W = 0.9642, p-value = 0.2887
##
## Call:
## lm(formula = host_het ~ bac_even + site, data = mast.het.bac.fore)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.724e-04 -9.672e-05 -1.669e-05 8.999e-05 5.437e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.774e-03 1.777e-04 15.614 < 2e-16 ***
## bac_even 1.508e-03 3.550e-04 4.248 0.000174 ***
## siteMSE -1.078e-04 8.274e-05 -1.303 0.201868
## siteTNW 2.530e-04 8.746e-05 2.893 0.006813 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000202 on 32 degrees of freedom
## Multiple R-squared: 0.4712, Adjusted R-squared: 0.4216
## F-statistic: 9.504 on 3 and 32 DF, p-value: 0.0001223
# lme1 <- lme(host_het ~ bac_even,data=mast.het.bac.fore,random=~1|site)
# summary(lme1)
#subset back reef
mast.het.bac.back <- subset(mast.het.bac,zone=="Back reef")
shapiro.test(log(mast.het.bac.back$host_het))##
## Shapiro-Wilk normality test
##
## data: log(mast.het.bac.back$host_het)
## W = 0.94907, p-value = 0.1152
##
## Shapiro-Wilk normality test
##
## data: mast.het.bac.back$bac_even
## W = 0.94005, p-value = 0.06185
##
## Call:
## lm(formula = host_het ~ bac_even + site, data = mast.het.bac.back)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.008e-04 -2.400e-04 -5.481e-05 1.421e-04 6.883e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.444e-03 3.323e-04 10.363 1.98e-11 ***
## bac_even 1.569e-04 6.026e-04 0.260 0.796
## siteMSE 2.229e-05 1.636e-04 0.136 0.893
## siteTNW 2.131e-04 1.527e-04 1.396 0.173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0003291 on 30 degrees of freedom
## Multiple R-squared: 0.09235, Adjusted R-squared: 0.001584
## F-statistic: 1.017 on 3 and 30 DF, p-value: 0.3987
#messy stats crap
#
# library(mblm)
# model.k <- mblm(het.t ~ bac_even, data=mast.het.bac)
# summary(model.k)
#
# mast.het.bac.mnwf <- subset(mast.het.bac,site=="MNW"&zone=="Fore reef")
# model.k <- mblm(host_het ~ bac_even, data=mast.het.bac.mnwf)
# summary(model.k)
# a1 <- lm(het.t~bac_even,data=mast.het.bac.mnwf)
# summary(a1) #p < 0.05*
# plot(a1)
#
# library(nlme)
# lme1 <- lme(het.t ~ bac_even,data=mast.het.bac,random=~1|zone)
# summary(lme1)
# mnwb <- subset(mast.mnw,zone=="Back reef")
# shapiro.test(mnwb$host_het)
# bestNormalize(mnwb$bac_even)
#
# lm1 <- lm(host_het~bac_even,data=mnwb)
# layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
# plot(lm1)#all info
# gg.het.even <- ggplot(data=mast,aes(x=het.t,y=bac_even,color=site,linetype=zone,shape=zone))+
# geom_point()+
# stat_smooth(method="lm",se=FALSE)+
# theme_cowplot()+
# #xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
# ylab("Evenness")+
# scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
# scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
# scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
# theme(axis.title.x=element_blank())
# gg.het.even
#
# #wrapped by site
# gg.het.even.site <- ggplot(data=mast,aes(x=het.t,y=bac_even,color=zone))+
# geom_point()+
# facet_wrap(~site)+
# stat_smooth(method="lm")+
# theme_cowplot()
# gg.het.even.site
#wrapped by zone
gg.het.even <- ggplot(data=mast,aes(x=host_het,y=bac_even))+
geom_point(aes(color=site,shape=site))+
facet_wrap(~zone)+
stat_smooth(method="lm",color="thistle4")+
#stat_smooth(method="lm",aes(color=site,linetype=site))+
theme_cowplot()+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
theme(axis.text.x = element_text(angle = 45, hjust=1))+
scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
ylab("Evennness")+
xlab("Host heterozygosity")+
xlim(0.0030,0.00433)
gg.het.even## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
##
## Shapiro-Wilk normality test
##
## data: mast$bac_shannon
## W = 0.9918, p-value = 0.8799
##
## Call:
## lm(formula = het.t ~ bac_shannon + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.86200 -0.63479 -0.03502 0.55229 2.16320
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.48196 0.57654 -2.570 0.0125 *
## bac_shannon 0.56438 0.22210 2.541 0.0135 *
## siteMSE 0.40134 0.41324 0.971 0.3352
## siteTNW 0.91958 0.39489 2.329 0.0231 *
## siteMNW:zoneFore reef 0.07604 0.40387 0.188 0.8513
## siteMSE:zoneFore reef -0.75545 0.35784 -2.111 0.0387 *
## siteTNW:zoneFore reef -0.12715 0.34141 -0.372 0.7108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8581 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.2342, Adjusted R-squared: 0.1612
## F-statistic: 3.21 on 6 and 63 DF, p-value: 0.008166
##
## Shapiro-Wilk normality test
##
## data: mast.het.bac.fore$host_het
## W = 0.95932, p-value = 0.2048
##
## Shapiro-Wilk normality test
##
## data: mast.het.bac.fore$bac_shannon
## W = 0.94139, p-value = 0.05612
##
## Call:
## lm(formula = host_het ~ bac_shannon + site, data = mast.het.bac.fore)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.900e-04 -9.645e-05 -1.616e-05 9.263e-05 5.453e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.959e-03 1.473e-04 20.091 < 2e-16 ***
## bac_shannon 2.731e-04 6.952e-05 3.928 0.000428 ***
## siteMSE -1.470e-04 8.560e-05 -1.718 0.095546 .
## siteTNW 2.685e-04 9.136e-05 2.939 0.006069 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002074 on 32 degrees of freedom
## Multiple R-squared: 0.4421, Adjusted R-squared: 0.3898
## F-statistic: 8.452 on 3 and 32 DF, p-value: 0.0002804
## Linear mixed-effects model fit by REML
## Data: mast.het.bac.fore
## AIC BIC logLik
## -461.4152 -455.3098 234.7076
##
## Random effects:
## Formula: ~1 | site
## (Intercept) Residual
## StdDev: 0.0001994764 0.0002074846
##
## Fixed effects: host_het ~ bac_shannon
## Value Std.Error DF t-value p-value
## (Intercept) 0.0030240437 1.741088e-04 32 17.368698 0e+00
## bac_shannon 0.0002596103 6.887735e-05 32 3.769168 7e-04
## Correlation:
## (Intr)
## bac_shannon -0.723
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.7893026 -0.4653924 -0.1378766 0.5074773 2.7147133
##
## Number of Observations: 36
## Number of Groups: 3
##
## Shapiro-Wilk normality test
##
## data: log(mast.het.bac.back$host_het)
## W = 0.94907, p-value = 0.1152
##
## Shapiro-Wilk normality test
##
## data: mast.het.bac.back$bac_shannon
## W = 0.95698, p-value = 0.1983
##
## Call:
## lm(formula = log(host_het) ~ bac_shannon, data = mast.het.bac.back)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16878 -0.07793 -0.00581 0.02756 0.18648
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.621452 0.060437 -93.013 <2e-16 ***
## bac_shannon -0.003603 0.032427 -0.111 0.912
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09078 on 32 degrees of freedom
## Multiple R-squared: 0.0003857, Adjusted R-squared: -0.03085
## F-statistic: 0.01235 on 1 and 32 DF, p-value: 0.9122
#looks good:
# gg.het.shan <- ggplot(data=mast,aes(x=het.t,y=bac_shannon,color=site,linetype=zone,shape=zone))+
# geom_point()+
# stat_smooth(method="lm",se=FALSE)+
# theme_cowplot()+
# xlab(" ")+
# ylab("Shannon index")+
# scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
# scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
# scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
# gg.het.shan
#wrapped by zone
gg.het.shan <- ggplot(data=mast,aes(x=host_het,y=bac_shannon))+
geom_point(aes(color=site,shape=site))+
facet_wrap(~zone)+
stat_smooth(method="lm",color="thistle4")+
theme_cowplot()+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
ylab("Shannon index")+
theme(axis.text.x = element_text(angle = 45, hjust=1))+
xlab("Host heterozygosity")+
xlim(0.0030,0.00433)
gg.het.shan## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_simpson)
## W = 0.97101, p-value = 0.0548
##
## Call:
## lm(formula = het.t ~ log(bac_simpson) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65551 -0.58018 0.00207 0.48132 2.20585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.16571 0.47039 -2.478 0.0159 *
## log(bac_simpson) 0.68735 0.26610 2.583 0.0121 *
## siteMSE 0.33167 0.40431 0.820 0.4151
## siteTNW 0.85611 0.38806 2.206 0.0310 *
## siteMNW:zoneFore reef 0.05197 0.40174 0.129 0.8975
## siteMSE:zoneFore reef -0.72255 0.35357 -2.044 0.0452 *
## siteTNW:zoneFore reef -0.05746 0.34577 -0.166 0.8685
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8568 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.2365, Adjusted R-squared: 0.1638
## F-statistic: 3.253 on 6 and 63 DF, p-value: 0.00754
##
## Shapiro-Wilk normality test
##
## data: mast.het.bac.fore$host_het
## W = 0.95932, p-value = 0.2048
##
## Shapiro-Wilk normality test
##
## data: log(mast.het.bac.fore$bac_simpson)
## W = 0.91649, p-value = 0.00994
simp.t <- bestNormalize(mast.het.bac.fore$bac_simpson)
mast.het.bac.fore$simp.t <- simp.t$x.t
#Standardized Box Cox Transformation with 36 nonmissing obs.:
a1 <- lm(host_het~simp.t+site,data=mast.het.bac.fore)
summary(a1) #p < 0.001##
## Call:
## lm(formula = host_het ~ simp.t + site, data = mast.het.bac.fore)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.739e-04 -1.075e-04 -6.600e-07 1.165e-04 5.745e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.442e-03 6.503e-05 52.933 < 2e-16 ***
## simp.t 1.504e-04 4.138e-05 3.634 0.000968 ***
## siteMSE -1.273e-04 8.724e-05 -1.460 0.154120
## siteTNW 2.981e-04 9.715e-05 3.068 0.004364 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002125 on 32 degrees of freedom
## Multiple R-squared: 0.4146, Adjusted R-squared: 0.3597
## F-statistic: 7.554 on 3 and 32 DF, p-value: 0.0005888
## Linear mixed-effects model fit by REML
## Data: mast.het.bac.fore
## AIC BIC logLik
## -454.5606 -448.4552 231.2803
##
## Random effects:
## Formula: ~1 | site
## (Intercept) Residual
## StdDev: 0.0001730851 0.0002223005
##
## Fixed effects: host_het ~ bac_simpson
## Value Std.Error DF t-value p-value
## (Intercept) 0.003332809 1.210956e-04 32 27.522145 0.000
## bac_simpson 0.000049354 1.711503e-05 32 2.883682 0.007
## Correlation:
## (Intr)
## bac_simpson -0.474
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.5792155 -0.4097195 -0.1950739 0.6642891 2.5979382
##
## Number of Observations: 36
## Number of Groups: 3
## Warning in wilcox.test.default(z$intercepts): cannot compute exact p-value with
## ties
## Warning in wilcox.test.default(z$slopes): cannot compute exact p-value with ties
## Warning in wilcox.test.default(z$intercepts): cannot compute exact p-value with
## ties
## Warning in wilcox.test.default(z$slopes): cannot compute exact p-value with ties
##
## Call:
## mblm(formula = het.t ~ bac_simpson, dataframe = mast.het.bac.fore)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.22179 -0.70184 -0.02041 0.44400 2.28997
##
## Coefficients:
## Estimate MAD V value Pr(>|V|)
## (Intercept) -1.1297 1.0580 96 0.000203 ***
## bac_simpson 0.2627 0.3199 508 0.006114 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9359 on 34 degrees of freedom
##
## Shapiro-Wilk normality test
##
## data: log(mast.het.bac.back$host_het)
## W = 0.94907, p-value = 0.1152
##
## Shapiro-Wilk normality test
##
## data: mast.het.bac.back$bac_simpson
## W = 0.96632, p-value = 0.3676
##
## Call:
## lm(formula = log(host_het) ~ bac_simpson + site, data = mast.het.bac.back)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.14597 -0.06611 -0.01355 0.04203 0.17638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.654506 0.070935 -79.713 <2e-16 ***
## bac_simpson 0.000522 0.016047 0.033 0.974
## siteMSE 0.004211 0.045196 0.093 0.926
## siteTNW 0.056901 0.042154 1.350 0.187
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08935 on 30 degrees of freedom
## Multiple R-squared: 0.09225, Adjusted R-squared: 0.001471
## F-statistic: 1.016 on 3 and 30 DF, p-value: 0.3993
#looks good:
# gg.het.shan <- ggplot(data=mast,aes(x=het.t,y=bac_shannon,color=site,linetype=zone,shape=zone))+
# geom_point()+
# stat_smooth(method="lm",se=FALSE)+
# theme_cowplot()+
# xlab(" ")+
# ylab("Shannon index")+
# scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
# scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
# scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
# gg.het.shan
#wrapped by zone
gg.het.simp <- ggplot(data=mast,aes(x=host_het,y=bac_simpson))+
geom_point(aes(color=site,shape=site))+
facet_wrap(~zone)+
stat_smooth(method="lm",color="thistle4")+
theme_cowplot()+
theme(axis.text.x = element_text(angle = 45, hjust=1))+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
ylab("Simpson's index")+
xlab("Host heterozygosity")+
xlim(0.0030,0.00433)
gg.het.simp## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_rich)
## W = 0.98642, p-value = 0.5262
##
## Call:
## lm(formula = het.t ~ log(bac_rich) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75827 -0.68668 -0.03266 0.56815 1.83602
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.883792 1.476813 -1.276 0.2068
## log(bac_rich) 0.386405 0.338355 1.142 0.2578
## siteMSE 0.197491 0.422911 0.467 0.6421
## siteTNW 0.788421 0.413572 1.906 0.0612 .
## siteMNW:zoneFore reef -0.009629 0.419931 -0.023 0.9818
## siteMSE:zoneFore reef -0.662011 0.383936 -1.724 0.0896 .
## siteTNW:zoneFore reef -0.162770 0.360370 -0.452 0.6531
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8918 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.1728, Adjusted R-squared: 0.094
## F-statistic: 2.193 on 6 and 63 DF, p-value: 0.0552
#wrapped by zone
gg.het.rich <- ggplot(data=mast,aes(x=host_het,y=bac_rich))+
geom_point(aes(color=site,shape=site))+
facet_wrap(~zone)+
stat_smooth(method="lm",color="thistle4")+
#stat_smooth(method="lm",aes(color=site,linetype=site))+
theme_cowplot()+
theme(axis.text.x = element_text(angle = 45, hjust=1))+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
ylab("Bact. richness")+
xlab("Host heterozygosity")
gg.het.rich## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
# gg.het.rich <- ggplot(data=mast,aes(x=het.t,y=log(bac_rich),color=site,linetype=zone,shape=zone))+
# geom_point()+
# stat_smooth(method="lm",se=FALSE)+
# theme_cowplot()+
# #xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
# ylab("ASV richness")+
# scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
# scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
# scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
# theme(axis.title.x=element_blank())
# gg.het.rich##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_phylo)
## W = 0.98859, p-value = 0.6734
##
## Call:
## lm(formula = het.t ~ log(bac_phylo) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.69723 -0.67985 -0.02846 0.57500 1.76471
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.51515 1.34623 -1.125 0.2647
## log(bac_phylo) 0.44951 0.45991 0.977 0.3321
## siteMSE 0.10113 0.40970 0.247 0.8058
## siteTNW 0.73665 0.40626 1.813 0.0746 .
## siteMNW:zoneFore reef -0.07387 0.41575 -0.178 0.8595
## siteMSE:zoneFore reef -0.59531 0.37084 -1.605 0.1134
## siteTNW:zoneFore reef -0.18926 0.35852 -0.528 0.5994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8942 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.1683, Adjusted R-squared: 0.08906
## F-statistic: 2.124 on 6 and 63 DF, p-value: 0.06271
gg.het.phyl <- ggplot(data=mast,aes(x=host_het,y=bac_phylo))+
geom_point(aes(color=site,shape=site))+
facet_wrap(~zone)+
stat_smooth(method="lm",color="thistle4")+
#stat_smooth(method="lm",aes(color=site,linetype=site))+
theme_cowplot()+
theme(axis.text.x = element_text(angle = 45, hjust=1))+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
ylab("Bact. Faith's D")+
xlab("Host heterozygosity")
gg.het.phyl## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
# gg.size.phyl <- ggplot(data=mast,aes(x=size.t,y=log(bac_phylo),color=site,linetype=zone,shape=zone))+
# geom_point()+
# stat_smooth(method="lm",se=FALSE)+
# theme_cowplot()+
# xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
# ylab("Faith's D")+
# scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
# scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
# scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
# gg.size.phylgg.het.bac.sig <- ggarrange(gg.het.even,gg.het.shan,gg.het.simp,nrow=3,ncol=1,labels="AUTO",common.legend=TRUE,legend="right")## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
gg.het.bac.ns <- ggarrange(gg.het.rich,gg.het.phyl,nrow=2,ncol=1,labels="AUTO",common.legend=TRUE,legend="right")## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
#ggsave(gg.het.bac.sig,file="het.bac.sig.pdf",height=8,width=4.5)
#ggsave(gg.het.bac.ns,file="het.bac.ns.pdf")#subset for correlations
host.bac <- mast[,c(7:8,12:16)]
chart.Correlation(host.bac, histogram=TRUE, pch=19)# summary(lm(host_het~bac_rich,data=mast))
# summary(lm(host_het~bac_even,data=mast))
# summary(lm(host_het~bac_phylo,data=mast))
# summary(lm(host_het~bac_shannon,data=mast))
# summary(lm(host_het~bac_simpson,data=mast))
#
# summary(lm(size_cm2~bac_rich,data=mast))
# summary(lm(size_cm2~bac_even,data=mast))
# summary(lm(size_cm2~bac_phylo,data=mast))
# summary(lm(size_cm2~bac_shannon,data=mast))
# summary(lm(size_cm2~bac_simpson,data=mast))
#other one - looks cool
#install.packages("ggcorrplot")
# library(ggcorrplot)
# library(dplyr)
#
# model.matrix(~0+., data=host.bac) %>%
# cor(use="pairwise.complete.obs") %>%
# ggcorrplot(show.diag = F, type="lower", lab=TRUE, lab_size=2)
ggplot(data=mast,aes(x=log(host_het),y=log(bac_even),color=zone))+
geom_point()+
facet_wrap(~site)+
stat_smooth(method="lm")+
theme_cowplot()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
ggplot(data=mast,aes(x=log(host_het),y=log(bac_shannon),color=site))+
geom_point()+
facet_wrap(~zone)+
stat_smooth(method="lm")+
theme_cowplot()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
ggplot(data=mast,aes(x=log(host_het),y=log(bac_simpson),color=site))+
geom_point()+
facet_wrap(~zone)+
stat_smooth(method="lm")+
theme_cowplot()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
##
## Shapiro-Wilk normality test
##
## data: mast$bac_even
## W = 0.98352, p-value = 0.3597
##
## Call:
## lm(formula = host_het ~ bac_even + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.312e-04 -1.782e-04 -2.149e-05 1.369e-04 7.809e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.081e-03 2.063e-04 14.934 <2e-16 ***
## bac_even 8.599e-04 3.514e-04 2.447 0.0172 *
## siteMSE 9.774e-05 1.320e-04 0.740 0.4618
## siteTNW 2.656e-04 1.256e-04 2.115 0.0384 *
## siteMNW:zoneFore reef -2.231e-06 1.298e-04 -0.017 0.9863
## siteMSE:zoneFore reef -2.073e-04 1.130e-04 -1.834 0.0713 .
## siteTNW:zoneFore reef -5.737e-05 1.098e-04 -0.522 0.6032
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002771 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.2292, Adjusted R-squared: 0.1558
## F-statistic: 3.122 on 6 and 63 DF, p-value: 0.009645
## [1] 17 48
#
# shapiro.test(mast.mnw$bac_even)
#
# l2 <- lm(host_het~bac_even+zone,data=mast.mnw)
# summary(l2)
#
# l3 <- lm(host_het~bac_even+zone,data=mast.mse)
# summary(l3)
#
# l4 <- lm(host_het~bac_even+zone,data=mast.tnw)
# summary(l4)
#
# qqPlot(m1)
shapiro.test(mast$bac_shannon)##
## Shapiro-Wilk normality test
##
## data: mast$bac_shannon
## W = 0.9918, p-value = 0.8799
##
## Call:
## lm(formula = het.t ~ bac_shannon + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.86200 -0.63479 -0.03502 0.55229 2.16320
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.48196 0.57654 -2.570 0.0125 *
## bac_shannon 0.56438 0.22210 2.541 0.0135 *
## siteMSE 0.40134 0.41324 0.971 0.3352
## siteTNW 0.91958 0.39489 2.329 0.0231 *
## siteMNW:zoneFore reef 0.07604 0.40387 0.188 0.8513
## siteMSE:zoneFore reef -0.75545 0.35784 -2.111 0.0387 *
## siteTNW:zoneFore reef -0.12715 0.34141 -0.372 0.7108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8581 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.2342, Adjusted R-squared: 0.1612
## F-statistic: 3.21 on 6 and 63 DF, p-value: 0.008166
## [1] 17 48
##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_simpson)
## W = 0.97101, p-value = 0.0548
##
## Call:
## lm(formula = het.t ~ log(bac_simpson) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65551 -0.58018 0.00207 0.48132 2.20585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.16571 0.47039 -2.478 0.0159 *
## log(bac_simpson) 0.68735 0.26610 2.583 0.0121 *
## siteMSE 0.33167 0.40431 0.820 0.4151
## siteTNW 0.85611 0.38806 2.206 0.0310 *
## siteMNW:zoneFore reef 0.05197 0.40174 0.129 0.8975
## siteMSE:zoneFore reef -0.72255 0.35357 -2.044 0.0452 *
## siteTNW:zoneFore reef -0.05746 0.34577 -0.166 0.8685
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8568 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.2365, Adjusted R-squared: 0.1638
## F-statistic: 3.253 on 6 and 63 DF, p-value: 0.00754
## [1] 17 48
##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_phylo)
## W = 0.98859, p-value = 0.6734
##
## Call:
## lm(formula = het.t ~ log(bac_phylo) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.69723 -0.67985 -0.02846 0.57500 1.76471
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.51515 1.34623 -1.125 0.2647
## log(bac_phylo) 0.44951 0.45991 0.977 0.3321
## siteMSE 0.10113 0.40970 0.247 0.8058
## siteTNW 0.73665 0.40626 1.813 0.0746 .
## siteMNW:zoneFore reef -0.07387 0.41575 -0.178 0.8595
## siteMSE:zoneFore reef -0.59531 0.37084 -1.605 0.1134
## siteTNW:zoneFore reef -0.18926 0.35852 -0.528 0.5994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8942 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.1683, Adjusted R-squared: 0.08906
## F-statistic: 2.124 on 6 and 63 DF, p-value: 0.06271
##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_rich)
## W = 0.98642, p-value = 0.5262
##
## Call:
## lm(formula = het.t ~ log(bac_rich) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75827 -0.68668 -0.03266 0.56815 1.83602
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.883792 1.476813 -1.276 0.2068
## log(bac_rich) 0.386405 0.338355 1.142 0.2578
## siteMSE 0.197491 0.422911 0.467 0.6421
## siteTNW 0.788421 0.413572 1.906 0.0612 .
## siteMNW:zoneFore reef -0.009629 0.419931 -0.023 0.9818
## siteMSE:zoneFore reef -0.662011 0.383936 -1.724 0.0896 .
## siteTNW:zoneFore reef -0.162770 0.360370 -0.452 0.6531
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8918 on 63 degrees of freedom
## (73 observations deleted due to missingness)
## Multiple R-squared: 0.1728, Adjusted R-squared: 0.094
## F-statistic: 2.193 on 6 and 63 DF, p-value: 0.0552
size.t <- bestNormalize(mast$size_cm2)
#Standardized Yeo-Johnson Transformation with 109 nonmissing obs.
mast$size.t <- size.t$x.t
shapiro.test(mast$size.t)##
## Shapiro-Wilk normality test
##
## data: mast$size.t
## W = 0.98683, p-value = 0.3641
##
## Shapiro-Wilk normality test
##
## data: mast$bac_even
## W = 0.98352, p-value = 0.3597
##
## Call:
## lm(formula = size.t ~ bac_even + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.68250 -0.51649 -0.01966 0.63430 1.54280
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.8624 0.5042 1.710 0.0921 .
## bac_even 0.2461 0.9765 0.252 0.8018
## siteMSE -0.7466 0.3133 -2.383 0.0202 *
## siteTNW -0.7863 0.3108 -2.530 0.0139 *
## siteMNW:zoneFore reef -1.3211 0.3160 -4.181 9.14e-05 ***
## siteMSE:zoneFore reef NA NA NA NA
## siteTNW:zoneFore reef -0.8049 0.3227 -2.494 0.0153 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8339 on 63 degrees of freedom
## (74 observations deleted due to missingness)
## Multiple R-squared: 0.3276, Adjusted R-squared: 0.2742
## F-statistic: 6.138 on 5 and 63 DF, p-value: 0.0001079
#all info
gg.size.even <- ggplot(data=mast,aes(x=size.t,y=bac_even,color=site,linetype=zone,shape=zone))+
geom_point()+
stat_smooth(method="lm",se=FALSE)+
theme_cowplot()+
#xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
ylab("Evenness")+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
theme(axis.title.x=element_blank())
gg.size.even## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
# #wrapped by site
# ggplot(data=mast,aes(x=log(size_cm2),y=log(bac_even),color=zone))+
# geom_point()+
# facet_wrap(~site)+
# stat_smooth(method="lm")+
# theme_cowplot()
#
# #wrapped by zone
# ggplot(data=mast,aes(x=log(size_cm2),y=log(bac_even),color=site))+
# geom_point()+
# stat_smooth(method="lm")+
# facet_wrap(~zone)+
# theme_cowplot()##
## Shapiro-Wilk normality test
##
## data: mast$bac_shannon
## W = 0.9918, p-value = 0.8799
##
## Call:
## lm(formula = size.t ~ bac_shannon + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.67243 -0.51293 -0.01862 0.63926 1.52001
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.89713 0.46180 1.943 0.0565 .
## bac_shannon 0.04028 0.20518 0.196 0.8450
## siteMSE -0.74487 0.31725 -2.348 0.0220 *
## siteTNW -0.78193 0.31457 -2.486 0.0156 *
## siteMNW:zoneFore reef -1.31809 0.31669 -4.162 9.75e-05 ***
## siteMSE:zoneFore reef NA NA NA NA
## siteTNW:zoneFore reef -0.80450 0.32399 -2.483 0.0157 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.834 on 63 degrees of freedom
## (74 observations deleted due to missingness)
## Multiple R-squared: 0.3273, Adjusted R-squared: 0.2739
## F-statistic: 6.131 on 5 and 63 DF, p-value: 0.0001092
gg.size.shan <- ggplot(data=mast,aes(x=size.t,y=bac_shannon,color=site,linetype=zone,shape=zone))+
geom_point()+
stat_smooth(method="lm",se=FALSE)+
theme_cowplot()+
xlab(" ")+
ylab("Shannon index")+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
gg.size.shan## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_simpson)
## W = 0.97101, p-value = 0.0548
##
## Call:
## lm(formula = size.t ~ log(bac_simpson) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.66530 -0.51652 -0.02691 0.60941 1.54948
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.87065 0.38976 2.234 0.0291 *
## log(bac_simpson) 0.09246 0.28167 0.328 0.7438
## siteMSE -0.74323 0.31309 -2.374 0.0207 *
## siteTNW -0.78307 0.31109 -2.517 0.0144 *
## siteMNW:zoneFore reef -1.32193 0.31587 -4.185 9e-05 ***
## siteMSE:zoneFore reef NA NA NA NA
## siteTNW:zoneFore reef -0.78724 0.33035 -2.383 0.0202 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8336 on 63 degrees of freedom
## (74 observations deleted due to missingness)
## Multiple R-squared: 0.3281, Adjusted R-squared: 0.2747
## F-statistic: 6.151 on 5 and 63 DF, p-value: 0.0001058
gg.size.simp <- ggplot(data=mast,aes(x=size.t,y=log(bac_simpson),color=site,linetype=zone,shape=zone))+
geom_point()+
stat_smooth(method="lm",se=FALSE)+
theme_cowplot()+
xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
ylab("Inv. Simpson index")+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
ylim(0,2.25)
gg.size.simp## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_rich)
## W = 0.98642, p-value = 0.5262
##
## Call:
## lm(formula = size.t ~ log(bac_rich) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65729 -0.51673 -0.01987 0.64024 1.47864
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.85395 1.39658 0.611 0.543098
## log(bac_rich) 0.02941 0.32902 0.089 0.929046
## siteMSE -0.75055 0.32152 -2.334 0.022779 *
## siteTNW -0.78398 0.32432 -2.417 0.018543 *
## siteMNW:zoneFore reef -1.31821 0.31905 -4.132 0.000108 ***
## siteMSE:zoneFore reef NA NA NA NA
## siteTNW:zoneFore reef -0.80599 0.33014 -2.441 0.017453 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8342 on 63 degrees of freedom
## (74 observations deleted due to missingness)
## Multiple R-squared: 0.327, Adjusted R-squared: 0.2736
## F-statistic: 6.122 on 5 and 63 DF, p-value: 0.0001107
gg.size.rich <- ggplot(data=mast,aes(x=size.t,y=log(bac_rich),color=site,linetype=zone,shape=zone))+
geom_point()+
stat_smooth(method="lm",se=FALSE)+
theme_cowplot()+
#xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
ylab("ASV richness")+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
theme(axis.title.x=element_blank())
gg.size.rich## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
##
## Shapiro-Wilk normality test
##
## data: log(mast$bac_phylo)
## W = 0.98859, p-value = 0.6734
##
## Call:
## lm(formula = size.t ~ log(bac_phylo) + site/zone, data = mast)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65445 -0.48462 0.00767 0.63242 1.48410
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02084 1.25056 0.017 0.9868
## log(bac_phylo) 0.33734 0.43453 0.776 0.4405
## siteMSE -0.73271 0.31029 -2.361 0.0213 *
## siteTNW -0.73039 0.31876 -2.291 0.0253 *
## siteMNW:zoneFore reef -1.31247 0.31488 -4.168 9.54e-05 ***
## siteMSE:zoneFore reef NA NA NA NA
## siteTNW:zoneFore reef -0.76337 0.32608 -2.341 0.0224 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8303 on 63 degrees of freedom
## (74 observations deleted due to missingness)
## Multiple R-squared: 0.3333, Adjusted R-squared: 0.2804
## F-statistic: 6.299 on 5 and 63 DF, p-value: 8.452e-05
gg.size.phyl <- ggplot(data=mast,aes(x=size.t,y=log(bac_phylo),color=site,linetype=zone,shape=zone))+
geom_point()+
stat_smooth(method="lm",se=FALSE)+
theme_cowplot()+
xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
ylab("Faith's D")+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
gg.size.phyl## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
gg.size.bac.all <- ggarrange(gg.size.rich,gg.size.even,gg.size.shan,gg.size.simp,gg.size.phyl,nrow=3,ncol=2,labels="AUTO",common.legend=TRUE,legend="right")## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
##the sammple sizes across syms are crazy uneven
#plot(bac_rich~sym_type,data=mast) #ns
# summary(aov(bac_rich~sym_type_top,data=mast)) #ns
# plot(bac_phylo~sym_type,data=mast) #ns
# summary(aov(bac_phylo~sym_type_top,data=mast)) #ns
#mast$bac<- complete.cases(mast$bac_even)
#sample numbers:
# plot(mast$sym_type_top)
#
#plot(bac_even~sym_type,data=mast)
# a.sym.bac.even <- aov(bac_even~sym_type_top,data=mast) #sig but A3 is the most different & it has almost no samples
# summary(a.sym.bac.even)
# TukeyHSD(a.sym.bac.even)
#
plot(bac_shannon~sym_type,data=mast) ##.sym.bac.shan <- aov(bac_shannon~sym_type_top,data=mast) #sig but A3 is the most different & it has almost no samples
# summary(a.sym.bac.shan) #sig
# TukeyHSD(a.sym.bac.shan)
#
plot(bac_simpson~sym_type,data=mast) ## a.sym.bac.simp <- aov(bac_simpson~sym_type_top,data=mast) #sig but A3 is the most different & it has almost no samples
# summary(a.sym.bac.simp) #sig
# TukeyHSD(a.sym.bac.simp)
plot(bac_rich~sym_type,data=mast)host.bac <- mast[,c(7:8,12:16)]
chart.Correlation(host.bac, histogram=TRUE, pch=19)
host.bac.mnw <- mast.mnw[,c(7:8,12:16)]
chart.Correlation(host.bac.mnw, histogram=TRUE, pch=19)
host.bac.mse <- mast.mse[,c(7:8,12:16)]
chart.Correlation(host.bac.mse, histogram=TRUE, pch=19)
host.bac.tnw <- mast.tnw[,c(7:8,12:16)]
chart.Correlation(host.bac.tnw, histogram=TRUE, pch=19)
summary(lm(log(host_het)~log(bac_even),data=mast.mnw)) #ns
summary(lm(log(host_het)~log(bac_even),data=mast.mse)) #ns
summary(lm(log(host_het)~log(bac_even),data=mast.tnw)) #ns
summary(lm(host_het~bac_even,data=mast.mnw)) #ns
summary(lm(host_het~bac_even,data=mast.mse)) #ns
summary(lm(host_het~bac_even,data=mast.tnw)) #ns
# library(nlme)
#
# bac <- mast[complete.cases(mast$bac_even),]
# host.bac <- bac[complete.cases(bac$host_het),]
#
# host.bac.mnw <- subset(host.bac,site=="MNW")
#
# lme.host.bac <- lme(host_het~bac_even+site+zone,data=host.bac,random=~1)
# summary(lm(host_het~bac_even+site+zone,data=host.bac))
# plot(lme.host.bac)
# anova <- anova(lme.host.bac)
# anova
#
# summary(lm(host_het~bac_even,data=host.bac.mnw,random=~1|zone))library(vegan)
data(mite)
data(mite.env)
data(mite.pcnm)
# Two explanatory data frames -- Hellinger-transform Y
mod <- varpart(mite, mite.env, mite.pcnm, transfo="hel")
mod##
## Partition of variance in RDA
##
## Call: varpart(Y = mite, X = mite.env, mite.pcnm, transfo = "hel")
## Species transformation: hellinger
## Explanatory tables:
## X1: mite.env
## X2: mite.pcnm
##
## No. of explanatory tables: 2
## Total variation (SS): 27.205
## Variance: 0.39428
## No. of observations: 70
##
## Partition table:
## Df R.squared Adj.R.squared Testable
## [a+b] = X1 11 0.52650 0.43670 TRUE
## [b+c] = X2 22 0.62300 0.44653 TRUE
## [a+b+c] = X1+X2 33 0.75893 0.53794 TRUE
## Individual fractions
## [a] = X1|X2 11 0.09141 TRUE
## [b] 0 0.34530 FALSE
## [c] = X2|X1 22 0.10124 TRUE
## [d] = Residuals 0.46206 FALSE
## ---
## Use function 'rda' to test significance of fractions of interest
## Test fraction [a] using partial RDA, '~ .' in formula tells to use
## all variables of data mite.env.
aFrac <- rda(decostand(mite, "hel"), mite.env, mite.pcnm)
anova(aFrac)## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: rda(X = decostand(mite, "hel"), Y = mite.env, Z = mite.pcnm)
## Df Variance F Pr(>F)
## Model 11 0.053592 1.8453 0.001 ***
## Residual 36 0.095050
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $r.squared
## [1] 0.1359251
##
## $adj.r.squared
## [1] 0.09140797
##
## Partition of squared Bray distance in dbRDA
##
## Call: varpart(Y = vegdist(mite), X = mite.env, mite.pcnm)
##
## Explanatory tables:
## X1: mite.env
## X2: mite.pcnm
##
## No. of explanatory tables: 2
## Total variation (SS): 14.696
## No. of observations: 70
##
## Partition table:
## Df R.squared Adj.R.squared Testable
## [a+b] = X1 11 0.50512 0.41127 TRUE
## [b+c] = X2 22 0.60144 0.41489 TRUE
## [a+b+c] = X1+X2 33 0.74631 0.51375 TRUE
## Individual fractions
## [a] = X1|X2 11 0.09887 TRUE
## [b] 0 0.31240 FALSE
## [c] = X2|X1 22 0.10249 TRUE
## [d] = Residuals 0.48625 FALSE
## ---
## Use function 'dbrda' to test significance of fractions of interest
## Three explanatory tables with formula interface
mod <- varpart(mite, ~ SubsDens + WatrCont, ~ Substrate + Shrub + Topo,
mite.pcnm, data=mite.env, transfo="hel")
mod##
## Partition of variance in RDA
##
## Call: varpart(Y = mite, X = ~SubsDens + WatrCont, ~Substrate + Shrub +
## Topo, mite.pcnm, data = mite.env, transfo = "hel")
## Species transformation: hellinger
## Explanatory tables:
## X1: ~SubsDens + WatrCont
## X2: ~Substrate + Shrub + Topo
## X3: mite.pcnm
##
## No. of explanatory tables: 3
## Total variation (SS): 27.205
## Variance: 0.39428
## No. of observations: 70
##
## Partition table:
## Df R.square Adj.R.square Testable
## [a+d+f+g] = X1 2 0.32677 0.30667 TRUE
## [b+d+e+g] = X2 9 0.40395 0.31454 TRUE
## [c+e+f+g] = X3 22 0.62300 0.44653 TRUE
## [a+b+d+e+f+g] = X1+X2 11 0.52650 0.43670 TRUE
## [a+c+d+e+f+g] = X1+X3 24 0.67372 0.49970 TRUE
## [b+c+d+e+f+g] = X2+X3 31 0.72400 0.49884 TRUE
## [a+b+c+d+e+f+g] = All 33 0.75893 0.53794 TRUE
## Individual fractions
## [a] = X1 | X2+X3 2 0.03910 TRUE
## [b] = X2 | X1+X3 9 0.03824 TRUE
## [c] = X3 | X1+X2 22 0.10124 TRUE
## [d] 0 0.01407 FALSE
## [e] 0 0.09179 FALSE
## [f] 0 0.08306 FALSE
## [g] 0 0.17045 FALSE
## [h] = Residuals 0.46206 FALSE
## Controlling 1 table X
## [a+d] = X1 | X3 2 0.05317 TRUE
## [a+f] = X1 | X2 2 0.12216 TRUE
## [b+d] = X2 | X3 9 0.05231 TRUE
## [b+e] = X2 | X1 9 0.13003 TRUE
## [c+e] = X3 | X1 22 0.19303 TRUE
## [c+f] = X3 | X2 22 0.18429 TRUE
## ---
## Use function 'rda' to test significance of fractions of interest
## Use RDA to test fraction [a]
## Matrix can be an argument in formula
rda.result <- rda(decostand(mite, "hell") ~ SubsDens + WatrCont +
Condition(Substrate + Shrub + Topo) +
Condition(as.matrix(mite.pcnm)), data = mite.env)
anova(rda.result)## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = decostand(mite, "hell") ~ SubsDens + WatrCont + Condition(Substrate + Shrub + Topo) + Condition(as.matrix(mite.pcnm)), data = mite.env)
## Df Variance F Pr(>F)
## Model 2 0.013771 2.6079 0.004 **
## Residual 36 0.095050
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Four explanatory tables
mod <- varpart(mite, ~ SubsDens + WatrCont, ~Substrate + Shrub + Topo,
mite.pcnm[,1:11], mite.pcnm[,12:22], data=mite.env, transfo="hel")
mod##
## Partition of variance in RDA
##
## Call: varpart(Y = mite, X = ~SubsDens + WatrCont, ~Substrate + Shrub +
## Topo, mite.pcnm[, 1:11], mite.pcnm[, 12:22], data = mite.env, transfo =
## "hel")
## Species transformation: hellinger
## Explanatory tables:
## X1: ~SubsDens + WatrCont
## X2: ~Substrate + Shrub + Topo
## X3: mite.pcnm[, 1:11]
## X4: mite.pcnm[, 12:22]
##
## No. of explanatory tables: 4
## Total variation (SS): 27.205
## Variance: 0.39428
## No. of observations: 70
##
## Partition table:
## Df R.square Adj.R.square Testable
## [aeghklno] = X1 2 0.32677 0.30667 TRUE
## [befiklmo] = X2 9 0.40395 0.31454 TRUE
## [cfgjlmno] = X3 11 0.53231 0.44361 TRUE
## [dhijkmno] = X4 11 0.09069 -0.08176 TRUE
## [abefghiklmno] = X1+X2 11 0.52650 0.43670 TRUE
## [acefghjklmno] = X1+X3 13 0.59150 0.49667 TRUE
## [adeghijklmno] = X1+X4 13 0.40374 0.26533 TRUE
## [bcefgijklmno] = X2+X3 20 0.63650 0.48813 TRUE
## [bdefhijklmno] = X2+X4 20 0.53338 0.34292 TRUE
## [cdfghijklmno] = X3+X4 22 0.62300 0.44653 TRUE
## [abcefghijklmno] = X1+X2+X3 22 0.67947 0.52944 TRUE
## [abdefghijklmno] = X1+X2+X4 22 0.61553 0.43557 TRUE
## [acdefghijklmno] = X1+X3+X4 24 0.67372 0.49970 TRUE
## [bcdefghijklmno] = X2+X3+X4 31 0.72400 0.49884 TRUE
## [abcdefghijklmno] = All 33 0.75893 0.53794 TRUE
## Individual fractions
## [a] = X1 | X2+X3+X4 2 0.03910 TRUE
## [b] = X2 | X1+X3+X4 9 0.03824 TRUE
## [c] = X3 | X1+X2+X4 11 0.10237 TRUE
## [d] = X4 | X1+X2+X3 11 0.00850 TRUE
## [e] 0 0.01407 FALSE
## [f] 0 0.13200 FALSE
## [g] 0 0.05355 FALSE
## [h] 0 0.00220 FALSE
## [i] 0 -0.00547 FALSE
## [j] 0 -0.00963 FALSE
## [k] 0 -0.00231 FALSE
## [l] 0 0.24037 FALSE
## [m] 0 -0.03474 FALSE
## [n] 0 0.02730 FALSE
## [o] 0 -0.06761 FALSE
## [p] = Residuals 0 0.46206 FALSE
## Controlling 2 tables X
## [ae] = X1 | X3+X4 2 0.05317 TRUE
## [ag] = X1 | X2+X4 2 0.09265 TRUE
## [ah] = X1 | X2+X3 2 0.04131 TRUE
## [be] = X2 | X3+X4 9 0.05231 TRUE
## [bf] = X2 | X1+X4 9 0.17024 TRUE
## [bi] = X2 | X1+X3 9 0.03277 TRUE
## [cf] = X3 | X1+X4 11 0.23437 TRUE
## [cg] = X3 | X2+X4 11 0.15592 TRUE
## [cj] = X3 | X1+X2 11 0.09274 TRUE
## [dh] = X4 | X2+X3 11 0.01071 TRUE
## [di] = X4 | X1+X3 11 0.00303 TRUE
## [dj] = X4 | X1+X2 11 -0.00113 TRUE
## Controlling 1 table X
## [aghn] = X1 | X2 2 0.12216 TRUE
## [aehk] = X1 | X3 2 0.05306 TRUE
## [aegl] = X1 | X4 2 0.34709 TRUE
## [bfim] = X2 | X1 9 0.13003 TRUE
## [beik] = X2 | X3 9 0.04452 TRUE
## [befl] = X2 | X4 9 0.42468 TRUE
## [cfjm] = X3 | X1 11 0.19000 TRUE
## [cgjn] = X3 | X2 11 0.17359 TRUE
## [cfgl] = X3 | X4 11 0.52830 TRUE
## [dijm] = X4 | X1 11 -0.04134 TRUE
## [dhjn] = X4 | X2 11 0.02837 TRUE
## [dhik] = X4 | X3 11 0.00292 TRUE
## ---
## Use function 'rda' to test significance of fractions of interest
## Show values for all partitions by putting 'cutoff' low enough:
plot(mod, cutoff = -Inf, cex = 0.7, bg=2:5)sym.data <- read.csv("~/nicfall drive/Moorea_revisions/mrits_revised/symportal_profile_counts.csv",header=T)
sym.meta <- read.csv("~/nicfall drive/Moorea_revisions/mrits_revised/mrits_sampledata copy.csv",header=T)
sym.sub <- mast[complete.cases(mast$sym_clade),]
sym.meta$sample_num <- sym.meta$Sample
sym.merge <- merge(sym.meta,sym.sub,by="sample_num")
sym.merge.het <- sym.merge[complete.cases(sym.merge$host_het),]
sym.site <- sym.merge.het[,c(7:8)]
sym.host <- sym.merge.het[,c(18)]
sym.data.less <- sym.data[sym.data$Sample %in% c(sym.merge.het$sample_num),]
row.names(sym.data.less) <- sym.data.less$Sample
sym.data2 <- sym.data.less[,2:10]
mod <- varpart(sym.data2, sym.site, sym.host, transfo="hel")
mod
## Use fill colours
showvarparts(2, bg = c("hotpink","skyblue"))
plot(mod, bg = c("hotpink","skyblue"))
## Test fraction [a] using partial RDA, '~ .' in formula tells to use
## all variables of data mite.env.
aFrac <- rda(decostand(sym.data2, "hel"), sym.site, sym.host)
anova(aFrac)
## RsquareAdj gives the same result as component [a] of varpart
RsquareAdj(aFrac)
sym.hell <- decostand(sym.data2, 'hell') # we are planning to do tb-RDA, this is Hellinger pre-transformation
rda(formula = sym.hell ~ site.x*zone.x, data = sym.site)